Random KFG Jittered Bin Sample:
— Pendant Pendant Sum Monte Carlo Simulation

1. Creating the distributions

1.1 The Existing FieldGuide Distribution

The first dataframe to build is a database of sums using the khipus in the existing KFG.

Code

import numpy as np
import random
from random import choices
import time
import pandas as pd
from pandas import Series, DataFrame

import khipu_kamayuq as kamayuq  # A Khipu Maker is known (in Quechua) as a Khipu Kamayuq
import khipu_qollqa as kq

# Plotly
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
plotly.offline.init_notebook_mode(connected = False)

from monte_carlo import DiscreteDistributionSampler, PendantSummer, StrawmanKhipu

Code

(khipu_dict, all_khipus) = kamayuq.fetch_khipus()
strawmen_kfg_khipu = [StrawmanKhipu(aKhipu.name(), "KFG", [aCord.knotted_value() for aCord in aKhipu.pendant_cords()]) for aKhipu in all_khipus]
strawmen_kfg_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_kfg_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_kfg_df.head()

	name	source	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
0	AS010	KFG	27	8	10.392305	3	2	5	3.0	1.000000	22.0	9.695360	0.227273	4.333333	0.577350	-6.5	4.949747
1	AS011	KFG	15	92	183.904867	0	0	0	0.0	0.000000	0.0	0.000000	0.000000	0.000000	0.000000	0.0	0.000000
2	AS012	KFG	85	2	5.196152	3	4	7	6.0	2.449490	18.0	7.348469	0.233333	10.333333	6.027714	-9.0	5.099020
3	AS013	KFG	90	4	14.456832	0	5	5	14.0	14.456832	48.0	44.508426	0.121951	0.000000	0.000000	-19.0	8.860023
4	AS014	KFG	42	53	40.137264	1	2	3	2.0	0.000000	99.0	7.000000	0.071429	17.000000	0.000000	-15.0	4.242641

Code

total_right_sums = sum(strawmen_kfg_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_kfg_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_kfg_df.mean_left_handedness.mean(),1), round(strawmen_kfg_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_kfg_df.mean_left_handedness.std(),1), round(strawmen_kfg_df.mean_right_handedness.std(),1))

print(f"Existing KFG - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"             - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

Existing KFG - Right/Left Distribution = 54%/46% (4354/total_left_sums=3734)
             - Right/Left Mean Handedness = 9.9/-8.5 ±(15.0/14.2)

1.2 Strawman Khipus based on a Jittered Bin Distribution based on the KFG Values

It could be argued that a uniform distribution is not similar to the actual khipu distribution which has a lot of low valued cords.

Accordingly, let’s use a jittered_bin distribution based on a probability density of the discrete samples from the Khipu Field Guide. The distribution should have:

The same number of khipus as the KFG
A pendant cord count chosen randomly from the existing khipus’ pendant cord counts
Pendant values that are randomly generated from a jittered bin distribution based on a discrete distribution of the KFG cord values. For more information see - Discrete Distribution Sampler

Code

cords_per_khipu = [aKhipu.num_pendant_cords() for aKhipu in all_khipus]
# Produce a khipu with # of cords sampled from the KFG khipu distribution
def sample_kfg_num_cords():
    num_cords = choices(cords_per_khipu,k=1)[0]
    if num_cords<3: num_cords = 3 # Mutate trivial khipus.
    return num_cords

# Number of random strawman khipus to produce
num_dummy_khipus = len(all_khipus)

def kfg_cord_distribution_sampler():
    cord_values = []
    for aKhipu in all_khipus:
        cord_values += [aCord.knotted_value() for aCord in aKhipu.pendant_cords() if aCord.knotted_value() > 0]
    sampler = DiscreteDistributionSampler(cord_values)
    return sampler
    
sampler = kfg_cord_distribution_sampler()

random_names = [f"rjittered_bin_khipu_{i:05d}" for i in range(num_dummy_khipus)]
strawmen_rjittered_bin_khipu = []
for i in range(num_dummy_khipus):
    #if i%50==0: print(f"Generating {i} of {num_dummy_khipus} random khipus")
    random_cords = [round(x) for x in  sampler.jittered_bin_sample(sample_kfg_num_cords())]
    strawmen_rjittered_bin_khipu.append( StrawmanKhipu(random_names[i], "rjittered_bin", random_cords) )
strawmen_rjittered_bin_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in strawmen_rjittered_bin_khipu], columns=StrawmanKhipu.dataframe_columns())
strawmen_rjittered_bin_df.head()

	name	source	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
0	rjittered_bin_khipu_00000	rjittered_bin	36	29.277778	41.427811	8	6	14	3.0	1.732051	43.857143	34.357526	0.388889	8.250000	4.062019	-8.333333	5.785038
1	rjittered_bin_khipu_00001	rjittered_bin	5	8.600000	6.985700	0	0	0	0.0	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
2	rjittered_bin_khipu_00002	rjittered_bin	20	272.650000	431.341759	3	1	4	2.0	0.000000	18.500000	4.358899	0.200000	3.333333	1.527525	-2.000000	0.000000
3	rjittered_bin_khipu_00003	rjittered_bin	49	68.877551	107.736104	7	7	14	4.0	2.236068	130.142857	155.251090	0.285714	19.000000	13.904436	-7.285714	7.040698
4	rjittered_bin_khipu_00004	rjittered_bin	21	150.904762	324.194371	0	1	1	3.0	0.000000	28.000000	0.000000	0.047619	0.000000	0.000000	-11.000000	0.000000

Code

total_right_sums = sum(strawmen_rjittered_bin_df.num_right_sums.tolist())
total_left_sums = sum(strawmen_rjittered_bin_df.num_left_sums.tolist())
total_sums = total_right_sums + total_left_sums
print(f"{total_right_sums=} {total_left_sums=}")
left_pct = round(100.0*float(total_left_sums)/float(total_sums)) if total_sums > 0 else 0
right_pct = round(100.0*float(total_right_sums)/float(total_sums)) if total_sums > 0 else 0
(left_handed_mean, right_handed_mean) = (round(strawmen_rjittered_bin_df.mean_left_handedness.mean(),1), round(strawmen_rjittered_bin_df.mean_right_handedness.mean(),1))
(left_handed_stdev, right_handed_stdev) = (round(strawmen_rjittered_bin_df.mean_left_handedness.std(),1), round(strawmen_rjittered_bin_df.mean_right_handedness.std(),1))

print(f"Random Jittered Bin - Right/Left Distribution = {right_pct}%/{left_pct}% ({total_right_sums}/{total_left_sums=})")
print(f"                  - Right/Left Mean Handedness = {right_handed_mean}/{left_handed_mean} ±({right_handed_stdev}/{left_handed_stdev})")

strawmen_rjittered_bin_df.describe()

total_right_sums=9869 total_left_sums=9779
Random Jittered Bin - Right/Left Distribution = 50%/50% (9869/total_left_sums=9779)
                  - Right/Left Mean Handedness = 13.3/-12.7 ±(12.8/12.8)

	num_pendants	mean_cord_value	stdev_cord_value	num_right_sums	num_left_sums	num_sums	mean_num_summands	stdev_num_summands	mean_sum_value	stdev_sum_value	num_sums_per_nonzero_pendant	mean_right_handedness	stdev_right_handedness	mean_left_handedness	stdev_left_handedness
count	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000	650.000000
mean	63.515385	282.958869	1068.862498	15.183077	15.044615	30.227692	2.947692	2.131828	92.664421	194.303311	0.241734	13.265380	9.573296	-12.738037	9.491821
std	86.719713	785.393759	2491.118041	33.209639	33.053715	66.121454	1.815963	2.443491	107.429548	372.856423	0.209714	12.836147	12.396232	12.839228	12.546433
min	3.000000	4.750000	2.872281	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-65.391892	0.000000
25%	16.000000	86.227083	192.500711	0.000000	0.000000	1.000000	2.000000	0.000000	19.125000	0.000000	0.066667	0.000000	0.000000	-19.986842	0.000000
50%	33.000000	167.729497	466.127534	3.000000	3.000000	6.000000	3.000000	1.414214	60.030303	48.024146	0.200000	10.414286	5.576730	-9.875000	5.300662
75%	76.000000	284.649522	1070.312141	14.000000	14.000000	28.750000	4.000000	3.464102	135.172464	231.001191	0.378788	20.541667	14.566347	0.000000	14.553012
max	864.000000	16565.333333	40556.039337	385.000000	373.000000	758.000000	9.000000	12.922848	1055.000000	3763.154765	0.877315	60.072727	76.186790	0.000000	74.303955

We see the handedness expands less than the random uniform, although the standard deviation is still quite high. The number of sums has increased (from random uniform) slightly to 0.042 sums per pendant, 1/4th of the existing KFG.

2. Random Jittered Bin vs. Existing KFG - Graphical Distribution

To graphically compare the distributions of the random khipus with existing khipus, one dataframe is needed:

Code

def source_color(x): return 0.0 if x == "KFG" else 1.0

combined_kfg_rjittered_bin_df = pd.concat([strawmen_kfg_df, strawmen_rjittered_bin_df], axis=0)
combined_kfg_rjittered_bin_df['source_color'] = [source_color(x) for x in combined_kfg_rjittered_bin_df.source.values]

Code

kfg_left =  sum(strawmen_kfg_df.num_left_sums.tolist())
kfg_right = sum(strawmen_kfg_df.num_right_sums.tolist())
pct_kfg_left = round(100.0*float(kfg_left)/float(kfg_left+kfg_right)) if (kfg_left+kfg_right) > 0 else 0
pct_kfg_right = round(100.0*float(kfg_right)/float(kfg_left+kfg_right)) if (kfg_left+kfg_right) > 0 else 0
rjittered_bin_left = sum(strawmen_rjittered_bin_df.num_left_sums.tolist())
rjittered_bin_right = sum(strawmen_rjittered_bin_df.num_right_sums.tolist())
all_rjittered_bin = rjittered_bin_left + rjittered_bin_right
pct_rjittered_bin_left = round(100.0*float(rjittered_bin_left)/float(all_rjittered_bin)) if all_rjittered_bin > 0 else 0
pct_rjittered_bin_right = round(100.0*float(rjittered_bin_right)/float(all_rjittered_bin)) if all_rjittered_bin > 0 else 0

print(f"Num Right/Left Sums for Existing KFG:{kfg_right}/{kfg_left} ({pct_kfg_right}%/{pct_kfg_left}%)")
print(f"Num Right/Left Sums for Random Jittered Bin: {rjittered_bin_right}/{rjittered_bin_left} ({pct_rjittered_bin_right}%/{pct_rjittered_bin_left}%)")

Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%)
Num Right/Left Sums for Random Jittered Bin: 9869/9779 (50%/50%)

Code

legend_text = "<b>Random Jittered Bin vs KFG - #Sums:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="num_right_sums", y="num_left_sums", log_y=True,log_x=True,
                 size="mean_num_summands",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

As expected, random sums that are small occur more. They also have fewer summands. Let’s evaluate the number of summands for the random khipus vs. the existing khipus.

Code

legend_text = "<b>Random Jittered Bin vs KFG - #Sums vs #Summands:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendan</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="mean_num_summands", y="num_sums", log_y=True,#log_x=True,
                 size="num_sums_per_nonzero_pendant",
                 opacity=.4, 
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030',],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

This echoes the previous statement about the number of summands being very different in the Random jittered_bin set! A relatively clear separation occurs.

Code

legend_text = "<b>Random Jittered Bin vs KFG - Sum Handedness:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Sums/Pendant</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="mean_left_handedness", y="mean_right_handedness",
                 size="num_sums_per_nonzero_pendant", 
                 opacity=0.3,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

Now we’re getting somewhere. Existing KFG Khipus have their sums close, for obvious reasons. However the randomly generated khipus have many more far sums, with a small number of summands.

Code

legend_text = "<b>Random Jittered Bin vs KFG - Mean Sum vs #Sums/Pendant:</b><i style=\"font-size:10pt;\"> Blue-KFG, Red-Random, Size-#Summands</i>"
fig = (px.scatter(combined_kfg_rjittered_bin_df, x="num_sums_per_nonzero_pendant", y="mean_sum_value", log_y=True,
                 size="mean_num_summands", 
                 opacity=0.5,
                 color='source_color', color_continuous_scale=['#3c3fff', '#ff3030', ],
                 labels={"name": f"Khipu Name"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944)
        .update_layout(showlegend=False).update(layout_coloraxis_showscale=False).show()
      )

3. Frequency Distributions

An examination of frequency distributions for key variables, using violin plots, where width=frequency and height=variable being measured.

3.1 Handedness Frequency

Code

combined_kfg_rjittered_bin_df['handedness_bias'] = [(abs(a)-abs(b)) for a,b in zip(combined_kfg_rjittered_bin_df['num_right_sums'].values.tolist(), combined_kfg_rjittered_bin_df['num_left_sums'].values.tolist())]
combined_kfg_rjittered_bin_df['source'] = ["KFG" if source == 'KFG' else "Random jittered_bin" for source in combined_kfg_rjittered_bin_df.source.values.tolist()]
legend_text = "<b>Random Jittered Bin vs KFG - Handedness Bias (#RightHandedSums - #LeftHandedSums)</b>"
fig = (px.violin(combined_kfg_rjittered_bin_df, y="handedness_bias",  
                 points='all', color="source",
                 hover_data=['name', 'num_sums'], title=legend_text,
                 width=944, height=944).show())

As another view of handedness. Let’s create 1000 sets of 650 sample khipus, and view their handedness overall compared to the KFG.

Code

# Compute Expensive - takes roughly 8 minutes per 100 sets
def make_handedness_sample(set_index, sampler):
    random_names = [f"rjittered_bin_khipu_{set_index:05d}_{i:05d}" for i in range(num_dummy_khipus)]
    rjittered_bin_khipu = []
    for i in range(num_dummy_khipus):
        random_cords = [round(x) for x in  sampler.jittered_bin_sample(sample_kfg_num_cords())]
        rjittered_bin_khipu.append( StrawmanKhipu(random_names[i], "rjittered_bin", random_cords) )
    
    rjittered_bin_df = pd.DataFrame([aStrawmanKhipu.dataframe_tuple() for aStrawmanKhipu in rjittered_bin_khipu], 
                                  columns=StrawmanKhipu.dataframe_columns())
    total_right_sums = sum(rjittered_bin_df.num_right_sums.tolist())
    total_left_sums = sum(rjittered_bin_df.num_left_sums.tolist())
    return (total_right_sums, total_left_sums)

def run_handedness_monte_carlo_experiment():
    handedness_set = []
    for i in range(num_sets):
        (right_sums, left_sums) = make_handedness_sample(i, sampler)
        handedness_set.append((right_sums, left_sums, right_sums-left_sums))
        # print(f"handedness_set[{i}] = ({right_sums}, {left_sums}, {right_sums-left_sums})")

        if (i == 0): marker = f"\n0"
        elif i%100 == 0: marker = f"({i})\n0"
        elif i%10 == 0: marker = f"{int(i/10)%10}"
        else: marker = "."
        print(marker, end="")
    print(f"({i})\n", end="")

    handedness_set_df = pd.DataFrame(handedness_set, columns=['num_right_sums', 'num_left_sums', 'handedness'])
    handedness_set_df.to_csv("./CSV/handedness_set.csv", index=False)
    return handedness_set_df

sampler = kfg_cord_distribution_sampler()
num_sets = 1000 
run_experiment = False

if run_experiment: 
    run_handedness_monte_carlo_experiment()
handedness_set_df = pd.read_csv("./CSV/handedness_set.csv");

Code

legend_text = "<b>Handedness Δ By Set</b><i style=\"font-size:.8em;\">- Red=KFG Handedness, Blue=Random Jittered Bin Set Samples</i>"
kfg_handedness = 620 #Num Right/Left Sums for Existing KFG:4354/3734 (54%/46%) - Handedness Delta: 620
fig = (px.violin(handedness_set_df, y="handedness",  
                 points='all', 
                 hover_data=['num_right_sums', 'num_left_sums', 'handedness'], 
                 title=legend_text,
                 width=944, height=944)
         .add_hline(y=kfg_handedness, line_width=3, line_color="red")
         .add_hline(y=800, line_width=.5, line_color="white") #gives graph some breathing room
         .show())

3.2 Sum Means

Code

legend_text = "<b>Violin Plot - Random Jittered Bin vs KFG - Log(Sum Mean)</b>"
combined_kfg_rjittered_bin_df['log_mean_sum'] = [math.log(abs(x)) if x > 0 else 0 for x in combined_kfg_rjittered_bin_df['mean_sum_value'].values.tolist()]
fig = (px.violin(combined_kfg_rjittered_bin_df, y="log_mean_sum",  
                 points='all', color="source",
                 labels={"log_mean_sum": "Log(Sum Mean)"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

3.3 Number of Sums per Pendant

Code

legend_text = "<b>Violin Plot - Random Jittered Bin vs KFG - #Sums per Pendant</b>"
fig = (px.violin(combined_kfg_rjittered_bin_df, y="num_sums_per_nonzero_pendant",  
                 points='all', color="source",
                 labels={"num_sums_per_nonzero_pendant": "#Sums per Pendant"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

3.4 Number of Summands per Sum Pendant

Code

legend_text = "<b>Violin Plot - Random Jittered Bin vs KFG - #Summands per Sum</b>"
fig = (px.violin(combined_kfg_rjittered_bin_df, y="mean_num_summands",  
                 points='all', color="source",
                 labels={"mean_num_summands": "#Summands per Sum"},
                 hover_data=['name', 'num_sums', 'mean_sum_value'], title=legend_text,
                 width=944, height=944).show())

This is also as you would expect - khipus that are randomly generated tend to have large sum values, few summands, and few sums per pendant cord.